1 RDA & Mantel

1.1 Setup

library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-7
library(ggplot2)
library(ggpubr)
#BiocManager::install("DESeq2")
library("DESeq2")
## Warning: package 'DESeq2' was built under R version 4.0.3
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.0.3
## Loading required package: stats4
## Loading required package: BiocGenerics
## Warning: package 'BiocGenerics' was built under R version 4.0.5
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## Warning: package 'IRanges' was built under R version 4.0.3
## Loading required package: GenomicRanges
## Warning: package 'GenomicRanges' was built under R version 4.0.3
## Loading required package: GenomeInfoDb
## Warning: package 'GenomeInfoDb' was built under R version 4.0.5
## Loading required package: SummarizedExperiment
## Warning: package 'SummarizedExperiment' was built under R version 4.0.3
## Loading required package: MatrixGenerics
## Warning: package 'MatrixGenerics' was built under R version 4.0.3
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Warning: package 'Biobase' was built under R version 4.0.3
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(phyloseq)
## Warning: package 'phyloseq' was built under R version 4.0.3
## 
## Attaching package: 'phyloseq'
## The following object is masked from 'package:SummarizedExperiment':
## 
##     distance
## The following object is masked from 'package:Biobase':
## 
##     sampleNames
## The following object is masked from 'package:GenomicRanges':
## 
##     distance
## The following object is masked from 'package:IRanges':
## 
##     distance
#setwd("nicfall_drive/Moorea_revisions/host-microbes/")
#setwd("~/Google Drive (nicfall@bu.edu)/Moorea_revisions/host-microbes")
setwd("~/nicfall@bu.edu - Google Drive/My Drive/Moorea_revisions/host-microbes")

1.2 2b-RAD x 16S

1.2.1 RDA

# reading genetic distances matrix
ibs <- as.matrix(read.table("clresult.ibsMat"))

bam.ids <- read.table("bamscl copy")
#remove extra crap from ids
bam.ids$V2 <- gsub(".trim.bt2.bam","",bam.ids$V1)
bam.ids$V2 <- gsub("I","-B",bam.ids$V2)
bam.ids$V2 <- gsub("O","-F",bam.ids$V2)
bam.ids$V2 <- gsub("T","TNW",bam.ids$V2)

row.names(ibs) <- c(bam.ids$V2)
colnames(ibs) <- c(bam.ids$V2)

# computing ordination
ibsord=capscale(as.dist(ibs)~1)

# plotting (for fun)
plot(ibsord)

# check which eigenvectors are interesting
plot(ibsord$CA$eig/sum(ibsord$CA$eig))

# choosing only the interesting eigenvectors to correlate with gene expression
ibs.scores=scores(ibsord,choices=c(1:4),"sites")

Processing 16s table

load("ps.trim copy.RData")

diagdds = phyloseq_to_deseq2(ps.trim, ~zone)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
diagdds
## class: DESeqDataSet 
## dim: 223 92 
## metadata(1): version
## assays(1): counts
## rownames(223): sq1 sq2 ... sq325 sq327
## rowData names(0):
## colnames(92): A1 A10 ... H8 H9
## colData names(9): row col ... zone site_zone
diagdds = estimateSizeFactors(diagdds)
diagdds = estimateDispersions(diagdds)
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
diagvst = getVarianceStabilizedData(diagdds)
dim(diagvst)
## [1] 223  92
# reading gene expression table (vsd) - can be a variance-stabilized
# OTU counts table
ge <- t(diagvst)

#sample data - has to match the ibs names
samdf <- data.frame(ps.trim@sam_data)
samdf$ibsnames <- paste(samdf$site_zone,"_",samdf$sample)
samdf$ibsnames <- gsub(" ","",samdf$ibsnames)

row.names(samdf) <- samdf$id
ge.names <- merge(samdf,ge,by=0)
row.names(ge.names) <- ge.names$ibsnames

ge.names.match <- ge.names[,12:234]

#subset the ones that match
ibs.scores.subset <- ibs.scores[row.names(ibs.scores) %in% c(samdf$ibsnames),]
ge.subset <- ge.names.match[row.names(ge.names.match) %in% c(row.names(ibs.scores.subset)),]

# testing how much GE variation is explained by IBS eigenvectors
row.names(ge.subset) == row.names(ibs.scores.subset)
##  [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
ge.subset.arr <- ge.subset[order(as.character(row.names(ge.subset))),]
ibs.scores.arr <- ibs.scores.subset[order(as.character(row.names(ibs.scores.subset))),]
row.names(ge.subset.arr) == row.names(ibs.scores.arr)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
ge.ibs <- rda(ge.subset.arr~ibs.scores.arr)

# plotting correlation of ibs.scores with gene expression ordination
plot(rda(ge.subset.arr~ibs.scores.arr))

# stats
anova(ge.ibs,step=1000,perm.max=10000) #no dice
## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 999
## 
## Model: rda(formula = ge.subset.arr ~ ibs.scores.arr)
##          Df Variance      F Pr(>F)
## Model     4    40.49 1.0135  0.429
## Residual 79   788.96

1.2.2 Mantel

#following instructions from here:
#https://jkzorz.github.io/2019/07/08/mantel-test.html
#& here:
#https://www.rdocumentation.org/packages/ape/versions/5.4-1/topics/mantel.test

otu.dist <- vegdist(ge.subset.arr, method="bray")
## Warning in vegdist(ge.subset.arr, method = "bray"): results may be meaningless
## because data have negative entries in method "bray"
ibs.subset1 <- ibs[row.names(ibs) %in% c(samdf$ibsnames),]
ibs.subset <- ibs.subset1[,colnames(ibs.subset1) %in% c(samdf$ibsnames)]
row.names(ge.subset.arr) == row.names(ibs.subset)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
ibs.dist <- as.matrix(ibs.subset)

host.bac.mant <- mantel(ibs.dist,otu.dist, method="spearman",permutations=999,na.rm=TRUE)
host.bac.mant #no dice
## 
## Mantel statistic based on Spearman's rank correlation rho 
## 
## Call:
## mantel(xdis = ibs.dist, ydis = otu.dist, method = "spearman",      permutations = 999, na.rm = TRUE) 
## 
## Mantel statistic r: -0.01399 
##       Significance: 0.599 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0783 0.0955 0.1122 0.1338 
## Permutation: free
## Number of permutations: 999

1.3 2b-RAD x ITS2

1.3.1 RDA

ps.its <- readRDS("ps.its2 copy.RDS")
otu.its <- data.frame(ps.its@otu_table)

# diagdds = phyloseq_to_deseq2(ps.its, ~zone)
# diagdds
# 
# diagdds = estimateSizeFactors(diagdds)
# diagdds = estimateDispersions(diagdds)
# diagvst = getVarianceStabilizedData(diagdds)
# dim(diagvst)

# reading gene expression table (vsd) - can be a variance-stabilized
# OTU counts table
its.t <- t(otu.its)

#sample data - has to match the ibs names
samdf.its <- data.frame(ps.its@sam_data)
samdf.its$sample_full <- gsub(" ","_",samdf.its$sample_full)

#row.names(samdf.its) <- samdf.its$sample_full
its.names <- merge(samdf.its,otu.its,by=0)
row.names(its.names) <- its.names$sample_full

its.names.match <- its.names[,13:21]

#subset the ones that match
ibs.scores.sub.its <- ibs.scores[row.names(ibs.scores) %in% c(samdf.its$sample_full),]
its.subset <- its.names.match[row.names(its.names.match) %in% c(row.names(ibs.scores.sub.its)),]

# testing how much GE variation is explained by IBS eigenvectors
row.names(its.subset) == row.names(ibs.scores.sub.its)
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85] FALSE
its.subset.arr <- its.subset[order(as.character(row.names(its.subset))),]
ibs.scores.arr.its <- ibs.scores.sub.its[order(as.character(row.names(ibs.scores.sub.its))),]
row.names(its.subset.arr) == row.names(ibs.scores.arr.its)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
its.ibs <- rda(its.subset.arr~ibs.scores.arr.its)

# plotting correlation of ibs.scores with gene expression ordination
plot(rda(its.subset.arr~ibs.scores.arr.its))

# stats
anova(its.ibs,step=1000,perm.max=10000) #no dice
## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 999
## 
## Model: rda(formula = its.subset.arr ~ ibs.scores.arr.its)
##          Df Variance      F Pr(>F)
## Model     4   267778 0.5643  0.877
## Residual 80  9489899

1.3.2 Mantel

#following instructions from here:
#https://jkzorz.github.io/2019/07/08/mantel-test.html
#& here:
#https://www.rdocumentation.org/packages/ape/versions/5.4-1/topics/mantel.test

its.dist <- vegdist(its.subset.arr, method="bray")

ibs.subset1.its <- ibs[row.names(ibs) %in% c(samdf.its$sample_full),]
ibs.subset.its <- ibs.subset1.its[,colnames(ibs.subset1.its) %in% c(samdf.its$sample_full)]
row.names(its.subset.arr) == row.names(ibs.subset.its)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
ibs.dist <- as.matrix(ibs.subset.its)

host.sym.mant <- mantel(ibs.dist,its.dist, method="spearman",permutations=999,na.rm=TRUE)
host.sym.mant #no dice
## 
## Mantel statistic based on Spearman's rank correlation rho 
## 
## Call:
## mantel(xdis = ibs.dist, ydis = its.dist, method = "spearman",      permutations = 999, na.rm = TRUE) 
## 
## Mantel statistic r: -0.02606 
##       Significance: 0.819 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0358 0.0480 0.0553 0.0639 
## Permutation: free
## Number of permutations: 999

1.4 16S x ITS2

1.4.1 RDA

ps.its <- readRDS("ps.its2 copy.RDS")
load("ps.trim copy.Rdata")

its.otu <- data.frame(ps.its@otu_table)

bac.otu <- data.frame(ps.trim@otu_table)
samdf.bac <- data.frame(ps.trim@sam_data)
#sample names have to match
row.names(bac.otu) == row.names(samdf.bac)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [91] TRUE TRUE
row.names(bac.otu) <- samdf.bac$sample

#subset the ones that match
its.sub <- its.otu[row.names(its.otu) %in% row.names(bac.otu),]
bac.sub <- bac.otu[row.names(bac.otu) %in% row.names(its.sub),]

# testing how much GE variation is explained by IBS eigenvectors
row.names(its.sub) == row.names(bac.sub)
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85] FALSE FALSE FALSE FALSE FALSE
its.sub.arr <- its.sub[order(as.character(row.names(its.sub))),]
bac.sub.arr <- bac.sub[order(as.character(row.names(bac.sub))),]

row.names(its.sub.arr) == row.names(bac.sub.arr)
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
its.dist <- vegdist(its.sub.arr)
itsord=capscale(as.dist(its.dist)~1)

# plotting (for fun)
plot(itsord)

# check which eigenvectors are interesting
plot(itsord$CA$eig/sum(itsord$CA$eig))

# choosing only the interesting eigenvectors to correlate with gene expression
itsord.scores=scores(itsord,choices=c(1:4),"sites")

its.bac.rda <- rda(bac.sub.arr~itsord.scores)

# plotting correlation of ibs.scores with gene expression ordination
plot(its.bac.rda)

# stats
anova(its.bac.rda,step=1000,perm.max=10000) #no dice
## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 999
## 
## Model: rda(formula = bac.sub.arr ~ itsord.scores)
##          Df Variance      F Pr(>F)
## Model     4  6256273 1.6187  0.113
## Residual 84 81167242

1.4.2 Mantel

# stats
its.dist <- vegdist(its.sub.arr, method="bray")
bac.dist <- vegdist(bac.sub.arr, method="bray")

bac.sym.mant <- mantel(bac.dist,its.dist, method="spearman",permutations=999,na.rm=TRUE)
bac.sym.mant
## 
## Mantel statistic based on Spearman's rank correlation rho 
## 
## Call:
## mantel(xdis = bac.dist, ydis = its.dist, method = "spearman",      permutations = 999, na.rm = TRUE) 
## 
## Mantel statistic r: 0.03884 
##       Significance: 0.105 
## 
## Upper quantiles of permutations (null model):
##    90%    95%  97.5%    99% 
## 0.0409 0.0522 0.0636 0.0848 
## Permutation: free
## Number of permutations: 999

2 Metric correlations

2.1 Setup

library(bestNormalize)
#install.packages("PerformanceAnalytics")
library("PerformanceAnalytics")
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following object is masked from 'package:S4Vectors':
## 
##     first
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
library(ggplot2)
library(cowplot)
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
## 
##     get_legend
library(car)
## Loading required package: carData
library(nlme)
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:IRanges':
## 
##     collapse
setwd("~/Google Drive (nicfall@bu.edu)/Moorea_revisions/host-microbes")

Read in data files, designate factors

mast <- read.csv('mr_new_master_meta.csv',na.strings=c("","NA"),check.names=FALSE)

mast$site <- as.factor(mast$site)
mast$zone <- as.factor(mast$zone)
mast$site_zone <- as.factor(mast$site_zone)
mast$sym_clade <- as.factor(mast$sym_clade)
mast$sym_type <- as.factor(mast$sym_type)
mast$sym_type_top <- as.factor(mast$sym_type_top)

str(mast)
## 'data.frame':    143 obs. of  16 variables:
##  $ Sample name     : chr  "MNW-B_51" "MNW-B_53" "MNW-B_55" "MNW-B_56" ...
##  $ sample_num      : chr  "51" "53" "55" "56" ...
##  $ site            : Factor w/ 3 levels "MNW","MSE","TNW": 1 1 1 1 1 1 1 1 1 1 ...
##  $ zone            : Factor w/ 2 levels "Back reef","Fore reef": 1 1 1 1 1 1 1 1 1 1 ...
##  $ site_zone       : Factor w/ 6 levels "MNW-B","MNW-F",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ 2b-RAD sequencer: chr  "HiSeq4000" "HiSeq4000" "HiSeq4000" "HiSeq4000" ...
##  $ size_cm2        : num  1244 371 249 441 514 ...
##  $ host_het        : num  0.00327 0.00311 0.00359 0.00395 NA ...
##  $ sym_type        : Factor w/ 9 levels "A1.A1ee","A1.A1gf.A1g",..: 4 NA 4 NA NA 3 NA 4 4 4 ...
##  $ sym_clade       : Factor w/ 2 levels "A","C": 2 NA 2 NA NA 1 NA 2 2 2 ...
##  $ sym_type_top    : Factor w/ 6 levels "A1","A3","C3ae",..: 3 NA 3 NA NA 2 NA 3 3 3 ...
##  $ bac_shannon     : num  1.43 NA 1.98 NA NA ...
##  $ bac_simpson     : num  2.58 NA 3.77 NA NA ...
##  $ bac_rich        : int  57 NA 50 NA NA 43 NA 76 85 43 ...
##  $ bac_even        : num  0.354 NA 0.506 NA NA ...
##  $ bac_phylo       : num  18.4 NA 13.9 NA NA ...
#by site
mast.mnw <- subset(mast,site=="MNW")
mast.mse <- subset(mast,site=="MSE")
mast.tnw <- subset(mast,site=="TNW")

2.2 Host metrics correlations

Nada

plot(host_het~site_zone,data=mast)

plot(size_cm2~site_zone,data=mast)

plot(host_het~size_cm2,data=mast)

plot(log(host_het)~log(size_cm2),data=mast)
abline(lm(log(host_het)~log(bac_rich),data=mast))

shapiro.test(mast$size_cm2)
## 
##  Shapiro-Wilk normality test
## 
## data:  mast$size_cm2
## W = 0.52031, p-value < 2.2e-16
shapiro.test(mast$host_het)
## 
##  Shapiro-Wilk normality test
## 
## data:  mast$host_het
## W = 0.93948, p-value = 6.15e-05
best.size <- bestNormalize(mast$size_cm2)
mast$size.t <- best.size$x.t
plot(size.t~site_zone,data=mast)

best.het <- bestNormalize(mast$host_het)
mast$het.t <- best.het$x.t

a1 <- aov(size.t~site/zone,data=mast)
TukeyHSD(a1)#tahiti different than other two
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = size.t ~ site/zone, data = mast)
## 
## $site
##               diff        lwr          upr     p adj
## MSE-MNW -0.3898905 -0.9001095  0.120328436 0.1690691
## TNW-MNW -0.9014607 -1.3088958 -0.494025636 0.0000023
## TNW-MSE -0.5115702 -1.0143403 -0.008800031 0.0451650
## 
## $`site:zone`
##                                   diff        lwr         upr     p adj
## MSE:Back reef-MNW:Back reef -0.9610373 -1.6722503 -0.24982432 0.0021230
## TNW:Back reef-MNW:Back reef -1.0388392 -1.7417336 -0.33594479 0.0005590
## MNW:Fore reef-MNW:Back reef -1.1994082 -1.9196607 -0.47915577 0.0000667
## MSE:Fore reef-MNW:Back reef         NA         NA          NA        NA
## TNW:Fore reef-MNW:Back reef -1.8702284 -2.5583234 -1.18213339 0.0000000
## TNW:Back reef-MSE:Back reef -0.0778019 -0.7890149  0.63341110 0.9995576
## MNW:Fore reef-MSE:Back reef -0.2383709 -0.9667437  0.49000190 0.9322267
## MSE:Fore reef-MSE:Back reef         NA         NA          NA        NA
## TNW:Fore reef-MSE:Back reef -0.9091911 -1.6057814 -0.21260075 0.0033699
## MNW:Fore reef-TNW:Back reef -0.1605690 -0.8808215  0.55968345 0.9870251
## MSE:Fore reef-TNW:Back reef         NA         NA          NA        NA
## TNW:Fore reef-TNW:Back reef -0.8313892 -1.5194842 -0.14329417 0.0085286
## MSE:Fore reef-MNW:Fore reef         NA         NA          NA        NA
## TNW:Fore reef-MNW:Fore reef -0.6708202 -1.3766372  0.03499692 0.0724064
## TNW:Fore reef-MSE:Fore reef         NA         NA          NA        NA
a1 <- aov(het.t~site/zone,data=mast)
TukeyHSD(a1)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = het.t ~ site/zone, data = mast)
## 
## $site
##               diff        lwr        upr     p adj
## MSE-MNW -0.5564720 -1.1150277 0.00208358 0.0510867
## TNW-MNW  0.1631006 -0.3681829 0.69438406 0.7465281
## TNW-MSE  0.7195726  0.2199059 1.21923927 0.0025100
## 
## $`site:zone`
##                                    diff        lwr        upr     p adj
## MSE:Back reef-MNW:Back reef -0.36976674 -1.3800907  0.6405572 0.8952596
## TNW:Back reef-MNW:Back reef  0.33333940 -0.6225580  1.2892368 0.9130871
## MNW:Fore reef-MNW:Back reef -0.01558811 -1.0382974  1.0071212 1.0000000
## MSE:Fore reef-MNW:Back reef -0.75055238 -1.7496645  0.2485598 0.2558173
## TNW:Fore reef-MNW:Back reef -0.03259056 -0.9957615  0.9305804 0.9999987
## TNW:Back reef-MSE:Back reef  0.70310614 -0.1624007  1.5686130 0.1809483
## MNW:Fore reef-MSE:Back reef  0.35417863 -0.5845953  1.2929526 0.8825268
## MSE:Fore reef-MSE:Back reef -0.38078565 -1.2937956  0.5322243 0.8310626
## TNW:Fore reef-MSE:Back reef  0.33717618 -0.5363571  1.2107095 0.8721230
## MNW:Fore reef-TNW:Back reef -0.34892751 -1.2288604  0.5310054 0.8588682
## MSE:Fore reef-TNW:Back reef -1.08389178 -1.9362841 -0.2314995 0.0046540
## TNW:Fore reef-TNW:Back reef -0.36592995 -1.1758966  0.4440367 0.7784652
## MSE:Fore reef-MNW:Fore reef -0.73496427 -1.6616612  0.1917326 0.2026298
## TNW:Fore reef-MNW:Fore reef -0.01700244 -0.9048315  0.8708266 0.9999999
## TNW:Fore reef-MSE:Fore reef  0.71796183 -0.1425793  1.5785030 0.1583426
#plot(het.t~size.t,data=mast)

ggplot(data=mast,aes(x=log(size_cm2),y=log(host_het),color=zone))+
  geom_point()+
  facet_wrap(~site)+
  stat_smooth(method="lm")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 51 rows containing non-finite values (stat_smooth).
## Warning: Removed 51 rows containing missing values (geom_point).

shapiro.test(mast$size.t)
## 
##  Shapiro-Wilk normality test
## 
## data:  mast$size.t
## W = 0.98683, p-value = 0.3641
shapiro.test(mast$het.t)
## 
##  Shapiro-Wilk normality test
## 
## data:  mast$het.t
## W = 0.97885, p-value = 0.06799
l1 <- lm(size.t~het.t+site/zone,data=mast)
summary(l1) 
## 
## Call:
## lm(formula = size.t ~ het.t + site/zone, data = mast)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.83868 -0.48725  0.03821  0.46905  1.67970 
## 
## Coefficients: (1 not defined because of singularities)
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            1.20436    0.22026   5.468 4.39e-07 ***
## het.t                  0.02525    0.08864   0.285 0.776404    
## siteMSE               -1.08904    0.29056  -3.748 0.000322 ***
## siteTNW               -1.23000    0.27988  -4.395 3.15e-05 ***
## siteMNW:zoneFore reef -1.33356    0.29663  -4.496 2.15e-05 ***
## siteMSE:zoneFore reef       NA         NA      NA       NA    
## siteTNW:zoneFore reef -0.85620    0.23981  -3.570 0.000586 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7932 on 86 degrees of freedom
##   (51 observations deleted due to missingness)
## Multiple R-squared:  0.4063, Adjusted R-squared:  0.3718 
## F-statistic: 11.77 on 5 and 86 DF,  p-value: 1.107e-08
l1 <- lm(het.t~size.t+site/zone,data=mast)
summary(l1) 
## 
## Call:
## lm(formula = het.t ~ size.t + site/zone, data = mast)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.16621 -0.71025 -0.04741  0.64329  2.25528 
## 
## Coefficients: (1 not defined because of singularities)
##                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)            0.07711    0.31081   0.248    0.805
## size.t                 0.03734    0.13106   0.285    0.776
## siteMSE               -0.32875    0.37943  -0.866    0.389
## siteTNW                0.45784    0.37336   1.226    0.223
## siteMNW:zoneFore reef -0.13651    0.40058  -0.341    0.734
## siteMSE:zoneFore reef       NA         NA      NA       NA
## siteTNW:zoneFore reef -0.41250    0.30929  -1.334    0.186
## 
## Residual standard error: 0.9645 on 86 degrees of freedom
##   (51 observations deleted due to missingness)
## Multiple R-squared:  0.07874,    Adjusted R-squared:  0.02518 
## F-statistic:  1.47 on 5 and 86 DF,  p-value: 0.208
l1 <- lm(host_het~size_cm2+site/zone,data=mast)
summary(l1) 
## 
## Call:
## lm(formula = host_het ~ size_cm2 + site/zone, data = mast)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -6.031e-04 -2.280e-04 -2.632e-05  1.627e-04  9.552e-04 
## 
## Coefficients: (1 not defined because of singularities)
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            3.814e-03  1.201e-04  31.755   <2e-16 ***
## size_cm2              -2.625e-07  1.285e-07  -2.042   0.0442 *  
## siteMSE               -2.587e-04  1.282e-04  -2.018   0.0467 *  
## siteTNW                8.201e-06  1.251e-04   0.066   0.9479    
## siteMNW:zoneFore reef -2.184e-04  1.347e-04  -1.622   0.1086    
## siteMSE:zoneFore reef         NA         NA      NA       NA    
## siteTNW:zoneFore reef -1.861e-04  9.462e-05  -1.967   0.0524 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0003145 on 86 degrees of freedom
##   (51 observations deleted due to missingness)
## Multiple R-squared:  0.1273, Adjusted R-squared:  0.07654 
## F-statistic: 2.509 on 5 and 86 DF,  p-value: 0.03606
ggplot(data=mast,aes(x=log(host_het),y=log(size_cm2),color=site))+
  geom_point()+
  facet_wrap(~zone)+
  stat_smooth(method="lm")+
  theme_cowplot()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 51 rows containing non-finite values (stat_smooth).

## Warning: Removed 51 rows containing missing values (geom_point).

#by site
mast.mnw <- subset(mast,site=="MNW")
mast.mse <- subset(mast,site=="MSE")
mast.tnw <- subset(mast,site=="TNW")

l1 <- lm(host_het~size_cm2,data=mast.mnw)
summary(l1) #ns
## 
## Call:
## lm(formula = host_het ~ size_cm2, data = mast.mnw)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -5.388e-04 -2.020e-04 -7.810e-06  2.911e-04  6.030e-04 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.678e-03  7.161e-05  51.368   <2e-16 ***
## size_cm2    -2.188e-07  1.177e-07  -1.859    0.074 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0003112 on 27 degrees of freedom
##   (18 observations deleted due to missingness)
## Multiple R-squared:  0.1134, Adjusted R-squared:  0.08059 
## F-statistic: 3.454 on 1 and 27 DF,  p-value: 0.07401
l1 <- lm(het.t~size.t,data=mast.mse) #no zone info for size
summary(l1) #ns
## 
## Call:
## lm(formula = het.t ~ size.t, data = mast.mse)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2806 -0.6072 -0.0118  0.5513  1.7386 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -0.2660     0.1888  -1.409    0.178
## size.t        0.1688     0.2190   0.771    0.452
## 
## Residual standard error: 0.7947 on 16 degrees of freedom
##   (26 observations deleted due to missingness)
## Multiple R-squared:  0.03583,    Adjusted R-squared:  -0.02443 
## F-statistic: 0.5945 on 1 and 16 DF,  p-value: 0.4519
l1 <- lm(het.t~size.t+zone,data=mast.tnw)
summary(l1) #ns
## 
## Call:
## lm(formula = het.t ~ size.t + zone, data = mast.tnw)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4298 -0.7081 -0.1531  0.7204  2.2100 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)     0.5361     0.2053   2.611   0.0125 *
## size.t          0.1318     0.1763   0.748   0.4589  
## zoneFore reef  -0.3305     0.3254  -1.016   0.3155  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.963 on 42 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.06582,    Adjusted R-squared:  0.02133 
## F-statistic:  1.48 on 2 and 42 DF,  p-value: 0.2394
tnw.host <- mast.tnw[complete.cases(mast.tnw$host_het),]
tnw.host.host <- tnw.host[complete.cases(tnw.host$size_cm2),]

a1 <- aov(host_het~size_cm2+Error(zone),data=tnw.host.host)
summary(a1) #ns
## 
## Error: zone
##          Df    Sum Sq   Mean Sq
## size_cm2  1 2.903e-07 2.903e-07
## 
## Error: Within
##           Df    Sum Sq   Mean Sq F value Pr(>F)
## size_cm2   1 9.000e-09 9.310e-09   0.079   0.78
## Residuals 42 4.962e-06 1.181e-07
mnw.host <- mast.mnw[complete.cases(mast.mnw$host_het),]
mnw.host.host <- mnw.host[complete.cases(mnw.host$size_cm2),]

a1 <- aov(het.t~size.t+Error(zone),data=mnw.host.host)
summary(a1) #ns
## 
## Error: zone
##        Df Sum Sq Mean Sq
## size.t  1 0.2494  0.2494
## 
## Error: Within
##           Df Sum Sq Mean Sq F value Pr(>F)
## size.t     1   1.53    1.53   1.391  0.249
## Residuals 26  28.61    1.10
#install.packages("mblm")
library(mblm)
model.k <- mblm(host_het ~ size_cm2, data=mnw.host.host)
summary(model.k)
## 
## Call:
## mblm(formula = host_het ~ size_cm2, dataframe = mnw.host.host)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -6.731e-04 -2.823e-04 -6.775e-05  2.242e-04  5.047e-04 
## 
## Coefficients:
##              Estimate       MAD V value Pr(>|V|)    
## (Intercept) 3.653e-03 1.305e-04     435 3.73e-09 ***
## size_cm2    2.170e-08 6.553e-07     271    0.256    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0003403 on 27 degrees of freedom

2.3 Host x sym

Nada

plot(host_het~sym_clade,data=mast) #ns

#there's more variation with the C but I think that's just number of samples things
plot(host_het~sym_type,data=mast) #ns

plot(het.t~sym_type,data=mast) #ns

plot(host_het~sym_type_top,data=mast) #ns

plot(size_cm2~sym_clade,data=mast) #ns

plot(size.t~sym_type,data=mast) #too much variation between sample sizes for this I think

plot(size_cm2~sym_type_top,data=mast) #ns

summary(mast$sym_type)
##                             A1.A1ee                         A1.A1gf.A1g 
##                                   1                                   3 
##                                  A3            C3ae.C3bj.C3f.C3.C3p.C3k 
##                                   1                                  37 
##                    C3ae.C3f.C3.C3bj C3ae.C3f.C3.C3bj.C3in.C3p.C3il.C3im 
##                                   2                                  12 
##           C3ae.C3f.C3bj.C3.C3p.C3ch                C3ae.C3g.C3f.C3.C3bj 
##                                  22                                   9 
##           C3ae.C3g.C3f.C3.C3io.C3bj                                NA's 
##                                   6                                  50
#taking out n of 1-2
lows <- c("A1.A1ee","A3","C3ae.C3f.C3.C3bj")
newdata <- mast[!mast$sym_type %in% lows,]

mast.less <- newdata[complete.cases(newdata$sym_type),]
plot(size.t~sym_type,data=mast.less) 

mast.less$sym_type <- gsub(".C","-C",mast.less$sym_type)
mast.less$sym_type <- gsub(".A","-A",mast.less$sym_type)
mast.less$sym_type <- gsub("C3ae-C3g-C3f-C3-C3bj","C3ae/C3g-C3f-C3-C3bj",mast.less$sym_type)
mast.less$sym_type <- gsub("C3ae-C3g-C3f-C3-C3io-C3bj","C3ae/C3g/C3f/C3/C3io-C3bj",mast.less$sym_type)

ggplot(mast.less,aes(x=sym_type,y=size.t,color=sym_type))+
  geom_boxplot()+
  theme_cowplot()+
  theme(axis.text.x=element_text(angle=45,hjust=1),legend.position="none")+
  xlab("Symbiont type")+
  ylab("Colony size (transf.)")
## Warning: Removed 15 rows containing non-finite values (stat_boxplot).

#ggsave("sym.size.pdf",width=5,height=5)

a1 <- aov(size.t~sym_type+site/zone,data=mast.less)
summary(a1)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## sym_type     5  21.11   4.222   8.260 4.57e-06 ***
## site         2   1.62   0.812   1.588    0.212    
## site:zone    2  13.79   6.897  13.495 1.29e-05 ***
## Residuals   64  32.71   0.511                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 15 observations deleted due to missingness
plot(a1)

library(agricolae)
## Registered S3 methods overwritten by 'klaR':
##   method      from 
##   predict.rda vegan
##   print.rda   vegan
##   plot.rda    vegan
## 
## Attaching package: 'agricolae'
## The following objects are masked from 'package:PerformanceAnalytics':
## 
##     kurtosis, skewness
a1 <- aov(size.t~sym_type,data=mast.less)

out <- HSD.test(a1,"sym_type")
out
## $statistics
##     MSerror Df       Mean       CV
##   0.7077369 68 0.03630616 2317.158
## 
## $parameters
##    test   name.t ntr StudentizedRange alpha
##   Tukey sym_type   6         4.147238  0.05
## 
## $means
##                                          size.t       std  r        Min
## A1-A1gf-A1g                         -1.04915768 0.6427173  3 -1.4746993
## C3ae-C3bj-C3f-C3-C3p-C3k             0.59115627 0.8204126 30 -0.9460429
## C3ae-C3f-C3-C3bj-C3in-C3p-C3il-C3im -0.64142138 0.9172433 12 -1.8204724
## C3ae-C3f-C3bj-C3-C3p-C3ch           -0.05881964 0.8985429 17 -1.5108395
## C3ae/C3g-C3f-C3-C3bj                -0.05826682 0.8201305  9 -1.6400258
## C3ae/C3g/C3f/C3/C3io-C3bj           -0.89305594 0.3369254  3 -1.2551285
##                                            Max        Q25         Q50
## A1-A1gf-A1g                         -0.3098213 -1.4188259 -1.36295248
## C3ae-C3bj-C3f-C3-C3p-C3k             2.0174870  0.1135068  0.51814395
## C3ae-C3f-C3-C3bj-C3in-C3p-C3il-C3im  1.3426434 -1.3463720 -0.80292938
## C3ae-C3f-C3bj-C3-C3p-C3ch            1.6659433 -0.8221743 -0.01122879
## C3ae/C3g-C3f-C3-C3bj                 0.7710662 -0.3398381  0.15801397
## C3ae/C3g/C3f/C3/C3io-C3bj           -0.5887465 -1.0452107 -0.83529281
##                                             Q75
## A1-A1gf-A1g                         -0.83638688
## C3ae-C3bj-C3f-C3-C3p-C3k             1.26174575
## C3ae-C3f-C3-C3bj-C3in-C3p-C3il-C3im -0.02964506
## C3ae-C3f-C3bj-C3-C3p-C3ch            0.31508902
## C3ae/C3g-C3f-C3-C3bj                 0.63127248
## C3ae/C3g/C3f/C3/C3io-C3bj           -0.71201964
## 
## $comparison
## NULL
## 
## $groups
##                                          size.t groups
## C3ae-C3bj-C3f-C3-C3p-C3k             0.59115627      a
## C3ae/C3g-C3f-C3-C3bj                -0.05826682     ab
## C3ae-C3f-C3bj-C3-C3p-C3ch           -0.05881964     ab
## C3ae-C3f-C3-C3bj-C3in-C3p-C3il-C3im -0.64142138      b
## C3ae/C3g/C3f/C3/C3io-C3bj           -0.89305594      b
## A1-A1gf-A1g                         -1.04915768      b
## 
## attr(,"class")
## [1] "group"
#hist(summary(mast.less$sym_type))

          #                   A1.A1ee                         A1.A1gf.A1g 
          #                         0                                   3 
          #                        A3            C3ae.C3bj.C3f.C3.C3p.C3k 
          #                         0                                  37 
          #          C3ae.C3f.C3.C3bj     C3ae.C3f.C3.C3bj.C3in.C3p.C3il.C3im 
          #                         0                                  12 
          # C3ae.C3f.C3bj.C3.C3p.C3ch                C3ae.C3g.C3f.C3.C3bj 
          #                        22                                   9 
          # C3ae.C3g.C3f.C3.C3io.C3bj 
          #                         6 

2.4 Host x bac

2.4.1 Host genetic div x bacterial diversity

shapiro.test(mast$host_het)
## 
##  Shapiro-Wilk normality test
## 
## data:  mast$host_het
## W = 0.93948, p-value = 6.15e-05
het.t <- bestNormalize(mast$host_het)
#Standardized Box Cox Transformation with 114 nonmissing obs.
mast$het.t <- het.t$x.t
shapiro.test(mast$het.t)
## 
##  Shapiro-Wilk normality test
## 
## data:  mast$het.t
## W = 0.97885, p-value = 0.06799

2.4.1.1 Evenness

shapiro.test(mast$bac_even)
## 
##  Shapiro-Wilk normality test
## 
## data:  mast$bac_even
## W = 0.98352, p-value = 0.3597
a1 <- lm(het.t~bac_even+site/zone,data=mast)
summary(a1) #p < 0.05*
## 
## Call:
## lm(formula = het.t ~ bac_even + site/zone, data = mast)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8900 -0.5866 -0.0178  0.4957  2.2542 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           -1.67115    0.63850  -2.617   0.0111 *
## bac_even               2.77751    1.08757   2.554   0.0131 *
## siteMSE                0.36463    0.40852   0.893   0.3755  
## siteTNW                0.85694    0.38872   2.205   0.0311 *
## siteMNW:zoneFore reef  0.04207    0.40174   0.105   0.9169  
## siteMSE:zoneFore reef -0.67144    0.34980  -1.920   0.0595 .
## siteTNW:zoneFore reef -0.15269    0.33987  -0.449   0.6548  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8577 on 63 degrees of freedom
##   (73 observations deleted due to missingness)
## Multiple R-squared:  0.2349, Adjusted R-squared:  0.162 
## F-statistic: 3.223 on 6 and 63 DF,  p-value: 0.007971
plot(a1)

mast.het <- mast[complete.cases(mast$het.t),]
mast.het.bac <- mast.het[complete.cases(mast.het$bac_even),]

#subset fore reef
mast.het.bac.fore <- subset(mast.het.bac,zone=="Fore reef")
shapiro.test(mast.het.bac.fore$host_het)
## 
##  Shapiro-Wilk normality test
## 
## data:  mast.het.bac.fore$host_het
## W = 0.95932, p-value = 0.2048
shapiro.test(mast.het.bac.fore$bac_even)
## 
##  Shapiro-Wilk normality test
## 
## data:  mast.het.bac.fore$bac_even
## W = 0.9642, p-value = 0.2887
a1 <- lm(host_het~bac_even+site,data=mast.het.bac.fore)
summary(a1) #p < 0.001***, R2 = 0.42
## 
## Call:
## lm(formula = host_het ~ bac_even + site, data = mast.het.bac.fore)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -3.724e-04 -9.672e-05 -1.669e-05  8.999e-05  5.437e-04 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.774e-03  1.777e-04  15.614  < 2e-16 ***
## bac_even     1.508e-03  3.550e-04   4.248 0.000174 ***
## siteMSE     -1.078e-04  8.274e-05  -1.303 0.201868    
## siteTNW      2.530e-04  8.746e-05   2.893 0.006813 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000202 on 32 degrees of freedom
## Multiple R-squared:  0.4712, Adjusted R-squared:  0.4216 
## F-statistic: 9.504 on 3 and 32 DF,  p-value: 0.0001223
plot(a1)

# lme1 <- lme(host_het ~ bac_even,data=mast.het.bac.fore,random=~1|site)
# summary(lme1)

#subset back reef
mast.het.bac.back <- subset(mast.het.bac,zone=="Back reef")
shapiro.test(log(mast.het.bac.back$host_het))
## 
##  Shapiro-Wilk normality test
## 
## data:  log(mast.het.bac.back$host_het)
## W = 0.94907, p-value = 0.1152
shapiro.test(mast.het.bac.back$bac_even)
## 
##  Shapiro-Wilk normality test
## 
## data:  mast.het.bac.back$bac_even
## W = 0.94005, p-value = 0.06185
a1 <- lm(host_het~bac_even+site,data=mast.het.bac.back)
summary(a1) #ns
## 
## Call:
## lm(formula = host_het ~ bac_even + site, data = mast.het.bac.back)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -5.008e-04 -2.400e-04 -5.481e-05  1.421e-04  6.883e-04 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3.444e-03  3.323e-04  10.363 1.98e-11 ***
## bac_even    1.569e-04  6.026e-04   0.260    0.796    
## siteMSE     2.229e-05  1.636e-04   0.136    0.893    
## siteTNW     2.131e-04  1.527e-04   1.396    0.173    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0003291 on 30 degrees of freedom
## Multiple R-squared:  0.09235,    Adjusted R-squared:  0.001584 
## F-statistic: 1.017 on 3 and 30 DF,  p-value: 0.3987
plot(a1)

#messy stats crap

# 
# library(mblm)

# model.k <- mblm(het.t ~ bac_even, data=mast.het.bac)
# summary(model.k)
# 
# mast.het.bac.mnwf <- subset(mast.het.bac,site=="MNW"&zone=="Fore reef")
# model.k <- mblm(host_het ~ bac_even, data=mast.het.bac.mnwf)
# summary(model.k)
# a1 <- lm(het.t~bac_even,data=mast.het.bac.mnwf)
# summary(a1) #p < 0.05*
# plot(a1)
# 
# library(nlme)
# lme1 <- lme(het.t ~ bac_even,data=mast.het.bac,random=~1|zone)
# summary(lme1)

# mnwb <- subset(mast.mnw,zone=="Back reef")
# shapiro.test(mnwb$host_het)
# bestNormalize(mnwb$bac_even)
# 
# lm1 <- lm(host_het~bac_even,data=mnwb)
# layout(matrix(c(1,2,3,4),2,2)) # optional 4 graphs/page
# plot(lm1)
#all info
# gg.het.even <- ggplot(data=mast,aes(x=het.t,y=bac_even,color=site,linetype=zone,shape=zone))+
#   geom_point()+
#   stat_smooth(method="lm",se=FALSE)+
#   theme_cowplot()+
#   #xlab("Colony size (cm^2, Yeo-Johns. trans.)")+
#   ylab("Evenness")+
#   scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
#   scale_shape_manual(name="Reef zone",values=c(16,15),labels=c("BR","FR"))+
#   scale_linetype_manual(name="Reef zone",values=c("solid","dashed"),labels=c("BR","FR"))+
#   theme(axis.title.x=element_blank())
# gg.het.even
# 
# #wrapped by site
# gg.het.even.site <- ggplot(data=mast,aes(x=het.t,y=bac_even,color=zone))+
#   geom_point()+
#   facet_wrap(~site)+
#   stat_smooth(method="lm")+
#   theme_cowplot()
# gg.het.even.site

#wrapped by zone
gg.het.even <- ggplot(data=mast,aes(x=host_het,y=bac_even))+
  geom_point(aes(color=site,shape=site))+
  facet_wrap(~zone)+
  stat_smooth(method="lm",color="thistle4")+
  #stat_smooth(method="lm",aes(color=site,linetype=site))+
  theme_cowplot()+
  scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
  #scale_linetype_manual(name="Site",values=c("solid","dotted","dotdash"),labels=c("MNW","MSE","TNW"))+
  theme(axis.text.x = element_text(angle = 45, hjust=1))+
  scale_shape_manual(name="Site",values=c(8,4,9),labels=c("MNW","MSE","TNW"))+
  ylab("Evennness")+
  xlab("Host heterozygosity")+
  xlim(0.0030,0.00433)
gg.het.even
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).

# ggplot(data=mast,aes(x=het.t,y=bac_even))+
#   geom_point()+
#   stat_smooth(method="lm")+
#   #facet_wrap()+
#   theme_cowplot()

2.4.1.2 Shannon

shapiro.test(mast$bac_shannon)
## 
##  Shapiro-Wilk normality test
## 
## data:  mast$bac_shannon
## W = 0.9918, p-value = 0.8799
a1 <- lm(het.t~bac_shannon+site/zone,data=mast)
summary(a1) #p < 0.05
## 
## Call:
## lm(formula = het.t ~ bac_shannon + site/zone, data = mast)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.86200 -0.63479 -0.03502  0.55229  2.16320 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           -1.48196    0.57654  -2.570   0.0125 *
## bac_shannon            0.56438    0.22210   2.541   0.0135 *
## siteMSE                0.40134    0.41324   0.971   0.3352  
## siteTNW                0.91958    0.39489   2.329   0.0231 *
## siteMNW:zoneFore reef  0.07604    0.40387   0.188   0.8513  
## siteMSE:zoneFore reef -0.75545    0.35784  -2.111   0.0387 *
## siteTNW:zoneFore reef -0.12715    0.34141  -0.372   0.7108  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8581 on 63 degrees of freedom
##   (73 observations deleted due to missingness)
## Multiple R-squared:  0.2342, Adjusted R-squared:  0.1612 
## F-statistic:  3.21 on 6 and 63 DF,  p-value: 0.008166
#subset fore reef
shapiro.test(mast.het.bac.fore$host_het)
## 
##  Shapiro-Wilk normality test
## 
## data:  mast.het.bac.fore$host_het
## W = 0.95932, p-value = 0.2048
shapiro.test(mast.het.bac.fore$bac_shannon) #barely good
## 
##  Shapiro-Wilk normality test
## 
## data:  mast.het.bac.fore$bac_shannon
## W = 0.94139, p-value = 0.05612
a1 <- lm(host_het~bac_shannon+site,data=mast.het.bac.fore)
summary(a1) #p < 0.001*** R2 = 0.39
## 
## Call:
## lm(formula = host_het ~ bac_shannon + site, data = mast.het.bac.fore)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -3.900e-04 -9.645e-05 -1.616e-05  9.263e-05  5.453e-04 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.959e-03  1.473e-04  20.091  < 2e-16 ***
## bac_shannon  2.731e-04  6.952e-05   3.928 0.000428 ***
## siteMSE     -1.470e-04  8.560e-05  -1.718 0.095546 .  
## siteTNW      2.685e-04  9.136e-05   2.939 0.006069 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0002074 on 32 degrees of freedom
## Multiple R-squared:  0.4421, Adjusted R-squared:  0.3898 
## F-statistic: 8.452 on 3 and 32 DF,  p-value: 0.0002804
plot(a1)

lme1 <- lme(host_het ~ bac_shannon,data=mast.het.bac.fore,random=~1|site)
summary(lme1)
## Linear mixed-effects model fit by REML
##   Data: mast.het.bac.fore 
##         AIC       BIC   logLik
##   -461.4152 -455.3098 234.7076
## 
## Random effects:
##  Formula: ~1 | site
##          (Intercept)     Residual
## StdDev: 0.0001994764 0.0002074846
## 
## Fixed effects:  host_het ~ bac_shannon 
##                    Value    Std.Error DF   t-value p-value
## (Intercept) 0.0030240437 1.741088e-04 32 17.368698   0e+00
## bac_shannon 0.0002596103 6.887735e-05 32  3.769168   7e-04
##  Correlation: 
##             (Intr)
## bac_shannon -0.723
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -1.7893026 -0.4653924 -0.1378766  0.5074773  2.7147133 
## 
## Number of Observations: 36
## Number of Groups: 3
#subset back reef
shapiro.test(log(mast.het.bac.back$host_het)) #good
## 
##  Shapiro-Wilk normality test
## 
## data:  log(mast.het.bac.back$host_het)
## W = 0.94907, p-value = 0.1152
shapiro.test(mast.het.bac.back$bac_shannon) #good
## 
##  Shapiro-Wilk normality test
## 
## data:  mast.het.bac.back$bac_shannon
## W = 0.95698, p-value = 0.1983
a1 <- lm(log(host_het)~bac_shannon,data=mast.het.bac.back)
summary(a1) #ns
## 
## Call:
## lm(formula = log(host_het) ~ bac_shannon, data = mast.het.bac.back)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.16878 -0.07793 -0.00581  0.02756  0.18648 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.621452   0.060437 -93.013   <2e-16 ***
## bac_shannon -0.003603   0.032427  -0.111    0.912    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09078 on 32 degrees of freedom
## Multiple R-squared:  0.0003857,  Adjusted R-squared:  -0.03085 
## F-statistic: 0.01235 on 1 and 32 DF,  p-value: 0.9122
plot(a1)

#looks good:
# gg.het.shan <- ggplot(data=mast,aes(x=het.t,y=bac_shannon,color=site,linetype=zone,shape=zone))+
#   geom_point()+
#   stat_smooth(method="lm",se=FALSE)+
#   theme_cowplot()+
#   xlab(" ")+
#   ylab("Shannon index")+
#   scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
#   scale_shape_manual(name="Reef zone",values=c(16,15),labels=c("BR","FR"))+
#   scale_linetype_manual(name="Reef zone",values=c("solid","dashed"),labels=c("BR","FR"))
# gg.het.shan

#wrapped by zone
gg.het.shan <- ggplot(data=mast,aes(x=host_het,y=bac_shannon))+
  geom_point(aes(color=site,shape=site))+
  facet_wrap(~zone)+
  stat_smooth(method="lm",color="thistle4")+
  theme_cowplot()+
  scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
  scale_linetype_manual(name="Site",values=c("solid","dotted","dotdash"),labels=c("MNW","MSE","TNW"))+
  scale_shape_manual(name="Site",values=c(8,4,9),labels=c("MNW","MSE","TNW"))+
  ylab("Shannon index")+
  theme(axis.text.x = element_text(angle = 45, hjust=1))+
  xlab("Host heterozygosity")+
  xlim(0.0030,0.00433)
gg.het.shan
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).

2.4.1.3 Simpson

shapiro.test(log(mast$bac_simpson))
## 
##  Shapiro-Wilk normality test
## 
## data:  log(mast$bac_simpson)
## W = 0.97101, p-value = 0.0548
a1 <- lm(het.t~log(bac_simpson)+site/zone,data=mast)
summary(a1) #p < 0.05
## 
## Call:
## lm(formula = het.t ~ log(bac_simpson) + site/zone, data = mast)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.65551 -0.58018  0.00207  0.48132  2.20585 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           -1.16571    0.47039  -2.478   0.0159 *
## log(bac_simpson)       0.68735    0.26610   2.583   0.0121 *
## siteMSE                0.33167    0.40431   0.820   0.4151  
## siteTNW                0.85611    0.38806   2.206   0.0310 *
## siteMNW:zoneFore reef  0.05197    0.40174   0.129   0.8975  
## siteMSE:zoneFore reef -0.72255    0.35357  -2.044   0.0452 *
## siteTNW:zoneFore reef -0.05746    0.34577  -0.166   0.8685  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8568 on 63 degrees of freedom
##   (73 observations deleted due to missingness)
## Multiple R-squared:  0.2365, Adjusted R-squared:  0.1638 
## F-statistic: 3.253 on 6 and 63 DF,  p-value: 0.00754
#subset fore reef
shapiro.test(mast.het.bac.fore$host_het)
## 
##  Shapiro-Wilk normality test
## 
## data:  mast.het.bac.fore$host_het
## W = 0.95932, p-value = 0.2048
shapiro.test(log(mast.het.bac.fore$bac_simpson))
## 
##  Shapiro-Wilk normality test
## 
## data:  log(mast.het.bac.fore$bac_simpson)
## W = 0.91649, p-value = 0.00994
simp.t <- bestNormalize(mast.het.bac.fore$bac_simpson)
mast.het.bac.fore$simp.t <- simp.t$x.t
#Standardized Box Cox Transformation with 36 nonmissing obs.:
a1 <- lm(host_het~simp.t+site,data=mast.het.bac.fore)
summary(a1) #p < 0.001
## 
## Call:
## lm(formula = host_het ~ simp.t + site, data = mast.het.bac.fore)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -3.739e-04 -1.075e-04 -6.600e-07  1.165e-04  5.745e-04 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.442e-03  6.503e-05  52.933  < 2e-16 ***
## simp.t       1.504e-04  4.138e-05   3.634 0.000968 ***
## siteMSE     -1.273e-04  8.724e-05  -1.460 0.154120    
## siteTNW      2.981e-04  9.715e-05   3.068 0.004364 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0002125 on 32 degrees of freedom
## Multiple R-squared:  0.4146, Adjusted R-squared:  0.3597 
## F-statistic: 7.554 on 3 and 32 DF,  p-value: 0.0005888
plot(a1)

lme1 <- lme(host_het ~ bac_simpson,data=mast.het.bac.fore,random=~1|site)
summary(lme1)
## Linear mixed-effects model fit by REML
##   Data: mast.het.bac.fore 
##         AIC       BIC   logLik
##   -454.5606 -448.4552 231.2803
## 
## Random effects:
##  Formula: ~1 | site
##          (Intercept)     Residual
## StdDev: 0.0001730851 0.0002223005
## 
## Fixed effects:  host_het ~ bac_simpson 
##                   Value    Std.Error DF   t-value p-value
## (Intercept) 0.003332809 1.210956e-04 32 27.522145   0.000
## bac_simpson 0.000049354 1.711503e-05 32  2.883682   0.007
##  Correlation: 
##             (Intr)
## bac_simpson -0.474
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -1.5792155 -0.4097195 -0.1950739  0.6642891  2.5979382 
## 
## Number of Observations: 36
## Number of Groups: 3
model.k <- mblm(het.t ~ bac_simpson, data=mast.het.bac.fore)
summary(model.k)
## Warning in wilcox.test.default(z$intercepts): cannot compute exact p-value with
## ties
## Warning in wilcox.test.default(z$slopes): cannot compute exact p-value with ties
## Warning in wilcox.test.default(z$intercepts): cannot compute exact p-value with
## ties
## Warning in wilcox.test.default(z$slopes): cannot compute exact p-value with ties
## 
## Call:
## mblm(formula = het.t ~ bac_simpson, dataframe = mast.het.bac.fore)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.22179 -0.70184 -0.02041  0.44400  2.28997 
## 
## Coefficients:
##             Estimate     MAD V value Pr(>|V|)    
## (Intercept)  -1.1297  1.0580      96 0.000203 ***
## bac_simpson   0.2627  0.3199     508 0.006114 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9359 on 34 degrees of freedom
#subset back reef
shapiro.test(log(mast.het.bac.back$host_het)) #good
## 
##  Shapiro-Wilk normality test
## 
## data:  log(mast.het.bac.back$host_het)
## W = 0.94907, p-value = 0.1152
shapiro.test(mast.het.bac.back$bac_simpson) #good
## 
##  Shapiro-Wilk normality test
## 
## data:  mast.het.bac.back$bac_simpson
## W = 0.96632, p-value = 0.3676
a1 <- lm(log(host_het)~bac_simpson+site,data=mast.het.bac.back)
summary(a1) #ns
## 
## Call:
## lm(formula = log(host_het) ~ bac_simpson + site, data = mast.het.bac.back)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.14597 -0.06611 -0.01355  0.04203  0.17638 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.654506   0.070935 -79.713   <2e-16 ***
## bac_simpson  0.000522   0.016047   0.033    0.974    
## siteMSE      0.004211   0.045196   0.093    0.926    
## siteTNW      0.056901   0.042154   1.350    0.187    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08935 on 30 degrees of freedom
## Multiple R-squared:  0.09225,    Adjusted R-squared:  0.001471 
## F-statistic: 1.016 on 3 and 30 DF,  p-value: 0.3993
plot(a1)

#looks good:
# gg.het.shan <- ggplot(data=mast,aes(x=het.t,y=bac_shannon,color=site,linetype=zone,shape=zone))+
#   geom_point()+
#   stat_smooth(method="lm",se=FALSE)+
#   theme_cowplot()+
#   xlab(" ")+
#   ylab("Shannon index")+
#   scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
#   scale_shape_manual(name="Reef zone",values=c(16,15),labels=c("BR","FR"))+
#   scale_linetype_manual(name="Reef zone",values=c("solid","dashed"),labels=c("BR","FR"))
# gg.het.shan

#wrapped by zone
gg.het.simp <- ggplot(data=mast,aes(x=host_het,y=bac_simpson))+
  geom_point(aes(color=site,shape=site))+
  facet_wrap(~zone)+
  stat_smooth(method="lm",color="thistle4")+
  theme_cowplot()+
  theme(axis.text.x = element_text(angle = 45, hjust=1))+
  scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
  #scale_linetype_manual(name="Site",values=c("solid","dotted","dotdash"),labels=c("MNW","MSE","TNW"))+
  scale_shape_manual(name="Site",values=c(8,4,9),labels=c("MNW","MSE","TNW"))+
  ylab("Simpson's index")+
  xlab("Host heterozygosity")+
  xlim(0.0030,0.00433)
gg.het.simp
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).

2.4.1.4 Richness

shapiro.test(log(mast$bac_rich))
## 
##  Shapiro-Wilk normality test
## 
## data:  log(mast$bac_rich)
## W = 0.98642, p-value = 0.5262
a1 <- lm(het.t~log(bac_rich)+site/zone,data=mast)
summary(a1) #ns
## 
## Call:
## lm(formula = het.t ~ log(bac_rich) + site/zone, data = mast)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.75827 -0.68668 -0.03266  0.56815  1.83602 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           -1.883792   1.476813  -1.276   0.2068  
## log(bac_rich)          0.386405   0.338355   1.142   0.2578  
## siteMSE                0.197491   0.422911   0.467   0.6421  
## siteTNW                0.788421   0.413572   1.906   0.0612 .
## siteMNW:zoneFore reef -0.009629   0.419931  -0.023   0.9818  
## siteMSE:zoneFore reef -0.662011   0.383936  -1.724   0.0896 .
## siteTNW:zoneFore reef -0.162770   0.360370  -0.452   0.6531  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8918 on 63 degrees of freedom
##   (73 observations deleted due to missingness)
## Multiple R-squared:  0.1728, Adjusted R-squared:  0.094 
## F-statistic: 2.193 on 6 and 63 DF,  p-value: 0.0552
#wrapped by zone
gg.het.rich <- ggplot(data=mast,aes(x=host_het,y=bac_rich))+
  geom_point(aes(color=site,shape=site))+
  facet_wrap(~zone)+
  stat_smooth(method="lm",color="thistle4")+
  #stat_smooth(method="lm",aes(color=site,linetype=site))+
  theme_cowplot()+
  theme(axis.text.x = element_text(angle = 45, hjust=1))+
  scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
  scale_linetype_manual(name="Site",values=c("solid","dotted","dotdash"),labels=c("MNW","MSE","TNW"))+
  scale_shape_manual(name="Site",values=c(8,4,9),labels=c("MNW","MSE","TNW"))+
  ylab("Bact. richness")+
  xlab("Host heterozygosity")
gg.het.rich
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).

# gg.het.rich <- ggplot(data=mast,aes(x=het.t,y=log(bac_rich),color=site,linetype=zone,shape=zone))+
#   geom_point()+
#   stat_smooth(method="lm",se=FALSE)+
#   theme_cowplot()+
#   #xlab("Colony size (cm^2, Yeo-Johns. trans.)")+
#   ylab("ASV richness")+
#   scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
#   scale_shape_manual(name="Reef zone",values=c(16,15),labels=c("BR","FR"))+
#   scale_linetype_manual(name="Reef zone",values=c("solid","dashed"),labels=c("BR","FR"))+
#   theme(axis.title.x=element_blank())
# gg.het.rich

2.4.1.5 Faith’s D

shapiro.test(log(mast$bac_phylo))
## 
##  Shapiro-Wilk normality test
## 
## data:  log(mast$bac_phylo)
## W = 0.98859, p-value = 0.6734
a1 <- lm(het.t~log(bac_phylo)+site/zone,data=mast)
summary(a1) #ns
## 
## Call:
## lm(formula = het.t ~ log(bac_phylo) + site/zone, data = mast)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.69723 -0.67985 -0.02846  0.57500  1.76471 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           -1.51515    1.34623  -1.125   0.2647  
## log(bac_phylo)         0.44951    0.45991   0.977   0.3321  
## siteMSE                0.10113    0.40970   0.247   0.8058  
## siteTNW                0.73665    0.40626   1.813   0.0746 .
## siteMNW:zoneFore reef -0.07387    0.41575  -0.178   0.8595  
## siteMSE:zoneFore reef -0.59531    0.37084  -1.605   0.1134  
## siteTNW:zoneFore reef -0.18926    0.35852  -0.528   0.5994  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8942 on 63 degrees of freedom
##   (73 observations deleted due to missingness)
## Multiple R-squared:  0.1683, Adjusted R-squared:  0.08906 
## F-statistic: 2.124 on 6 and 63 DF,  p-value: 0.06271
gg.het.phyl <- ggplot(data=mast,aes(x=host_het,y=bac_phylo))+
  geom_point(aes(color=site,shape=site))+
  facet_wrap(~zone)+
  stat_smooth(method="lm",color="thistle4")+
  #stat_smooth(method="lm",aes(color=site,linetype=site))+
  theme_cowplot()+
  theme(axis.text.x = element_text(angle = 45, hjust=1))+
  scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
  #scale_linetype_manual(name="Site",values=c("solid","dotted","dotdash"),labels=c("MNW","MSE","TNW"))+
  scale_shape_manual(name="Site",values=c(8,4,9),labels=c("MNW","MSE","TNW"))+
  ylab("Bact. Faith's D")+
  xlab("Host heterozygosity")
gg.het.phyl
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).

# gg.size.phyl <- ggplot(data=mast,aes(x=size.t,y=log(bac_phylo),color=site,linetype=zone,shape=zone))+
#   geom_point()+
#   stat_smooth(method="lm",se=FALSE)+
#   theme_cowplot()+
#   xlab("Colony size (cm^2, Yeo-Johns. trans.)")+
#   ylab("Faith's D")+
#   scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
#   scale_shape_manual(name="Reef zone",values=c(16,15),labels=c("BR","FR"))+
#   scale_linetype_manual(name="Reef zone",values=c("solid","dashed"),labels=c("BR","FR"))
# gg.size.phyl

2.4.2 Het x bacterial diversity: synthesizing results

gg.het.bac.sig <- ggarrange(gg.het.even,gg.het.shan,gg.het.simp,nrow=3,ncol=1,labels="AUTO",common.legend=TRUE,legend="right")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).

## Warning: Removed 73 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).

## Warning: Removed 73 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).

## Warning: Removed 73 rows containing missing values (geom_point).
gg.het.bac.sig

gg.het.bac.ns <- ggarrange(gg.het.rich,gg.het.phyl,nrow=2,ncol=1,labels="AUTO",common.legend=TRUE,legend="right")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).

## Warning: Removed 73 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).

## Warning: Removed 73 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).

## Warning: Removed 73 rows containing missing values (geom_point).
gg.het.bac.ns

#ggsave(gg.het.bac.sig,file="het.bac.sig.pdf",height=8,width=4.5)
#ggsave(gg.het.bac.ns,file="het.bac.ns.pdf")
#subset for correlations
host.bac <- mast[,c(7:8,12:16)]

chart.Correlation(host.bac, histogram=TRUE, pch=19)

# summary(lm(host_het~bac_rich,data=mast))
# summary(lm(host_het~bac_even,data=mast))
# summary(lm(host_het~bac_phylo,data=mast))
# summary(lm(host_het~bac_shannon,data=mast))
# summary(lm(host_het~bac_simpson,data=mast))
# 
# summary(lm(size_cm2~bac_rich,data=mast))
# summary(lm(size_cm2~bac_even,data=mast))
# summary(lm(size_cm2~bac_phylo,data=mast))
# summary(lm(size_cm2~bac_shannon,data=mast))
# summary(lm(size_cm2~bac_simpson,data=mast))

#other one - looks cool
#install.packages("ggcorrplot")
# library(ggcorrplot)
# library(dplyr)
# 
# model.matrix(~0+., data=host.bac) %>%
#   cor(use="pairwise.complete.obs") %>%
#   ggcorrplot(show.diag = F, type="lower", lab=TRUE, lab_size=2)

ggplot(data=mast,aes(x=log(host_het),y=log(bac_even),color=zone))+
  geom_point()+
  facet_wrap(~site)+
  stat_smooth(method="lm")+
  theme_cowplot()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).
## Warning: Removed 73 rows containing missing values (geom_point).

ggplot(data=mast,aes(x=log(host_het),y=log(bac_shannon),color=site))+
  geom_point()+
  facet_wrap(~zone)+
  stat_smooth(method="lm")+
  theme_cowplot()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).

## Warning: Removed 73 rows containing missing values (geom_point).

ggplot(data=mast,aes(x=log(host_het),y=log(bac_simpson),color=site))+
  geom_point()+
  facet_wrap(~zone)+
  stat_smooth(method="lm")+
  theme_cowplot()
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 73 rows containing non-finite values (stat_smooth).

## Warning: Removed 73 rows containing missing values (geom_point).

shapiro.test(mast$bac_even)
## 
##  Shapiro-Wilk normality test
## 
## data:  mast$bac_even
## W = 0.98352, p-value = 0.3597
a1 <- lm(host_het~bac_even+site/zone,data=mast)
summary(a1)
## 
## Call:
## lm(formula = host_het ~ bac_even + site/zone, data = mast)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -5.312e-04 -1.782e-04 -2.149e-05  1.369e-04  7.809e-04 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            3.081e-03  2.063e-04  14.934   <2e-16 ***
## bac_even               8.599e-04  3.514e-04   2.447   0.0172 *  
## siteMSE                9.774e-05  1.320e-04   0.740   0.4618    
## siteTNW                2.656e-04  1.256e-04   2.115   0.0384 *  
## siteMNW:zoneFore reef -2.231e-06  1.298e-04  -0.017   0.9863    
## siteMSE:zoneFore reef -2.073e-04  1.130e-04  -1.834   0.0713 .  
## siteTNW:zoneFore reef -5.737e-05  1.098e-04  -0.522   0.6032    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0002771 on 63 degrees of freedom
##   (73 observations deleted due to missingness)
## Multiple R-squared:  0.2292, Adjusted R-squared:  0.1558 
## F-statistic: 3.122 on 6 and 63 DF,  p-value: 0.009645
qqPlot(a1)

## [1] 17 48
plot(a1)

# 
# shapiro.test(mast.mnw$bac_even)
# 
# l2 <- lm(host_het~bac_even+zone,data=mast.mnw)
# summary(l2)
# 
# l3 <- lm(host_het~bac_even+zone,data=mast.mse)
# summary(l3)
# 
# l4 <- lm(host_het~bac_even+zone,data=mast.tnw)
# summary(l4)
# 
# qqPlot(m1)

shapiro.test(mast$bac_shannon)
## 
##  Shapiro-Wilk normality test
## 
## data:  mast$bac_shannon
## W = 0.9918, p-value = 0.8799
a1 <- lm(het.t~bac_shannon+site/zone,data=mast)
summary(a1)
## 
## Call:
## lm(formula = het.t ~ bac_shannon + site/zone, data = mast)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.86200 -0.63479 -0.03502  0.55229  2.16320 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           -1.48196    0.57654  -2.570   0.0125 *
## bac_shannon            0.56438    0.22210   2.541   0.0135 *
## siteMSE                0.40134    0.41324   0.971   0.3352  
## siteTNW                0.91958    0.39489   2.329   0.0231 *
## siteMNW:zoneFore reef  0.07604    0.40387   0.188   0.8513  
## siteMSE:zoneFore reef -0.75545    0.35784  -2.111   0.0387 *
## siteTNW:zoneFore reef -0.12715    0.34141  -0.372   0.7108  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8581 on 63 degrees of freedom
##   (73 observations deleted due to missingness)
## Multiple R-squared:  0.2342, Adjusted R-squared:  0.1612 
## F-statistic:  3.21 on 6 and 63 DF,  p-value: 0.008166
qqPlot(a1)

## [1] 17 48
plot(a1)

shapiro.test(log(mast$bac_simpson))
## 
##  Shapiro-Wilk normality test
## 
## data:  log(mast$bac_simpson)
## W = 0.97101, p-value = 0.0548
a1 <- lm(het.t~log(bac_simpson)+site/zone,data=mast)
summary(a1)
## 
## Call:
## lm(formula = het.t ~ log(bac_simpson) + site/zone, data = mast)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.65551 -0.58018  0.00207  0.48132  2.20585 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           -1.16571    0.47039  -2.478   0.0159 *
## log(bac_simpson)       0.68735    0.26610   2.583   0.0121 *
## siteMSE                0.33167    0.40431   0.820   0.4151  
## siteTNW                0.85611    0.38806   2.206   0.0310 *
## siteMNW:zoneFore reef  0.05197    0.40174   0.129   0.8975  
## siteMSE:zoneFore reef -0.72255    0.35357  -2.044   0.0452 *
## siteTNW:zoneFore reef -0.05746    0.34577  -0.166   0.8685  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8568 on 63 degrees of freedom
##   (73 observations deleted due to missingness)
## Multiple R-squared:  0.2365, Adjusted R-squared:  0.1638 
## F-statistic: 3.253 on 6 and 63 DF,  p-value: 0.00754
qqPlot(a1)

## [1] 17 48
plot(a1)

shapiro.test(log(mast$bac_phylo))
## 
##  Shapiro-Wilk normality test
## 
## data:  log(mast$bac_phylo)
## W = 0.98859, p-value = 0.6734
a1 <- lm(het.t~log(bac_phylo)+site/zone,data=mast)
summary(a1) #ns
## 
## Call:
## lm(formula = het.t ~ log(bac_phylo) + site/zone, data = mast)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.69723 -0.67985 -0.02846  0.57500  1.76471 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           -1.51515    1.34623  -1.125   0.2647  
## log(bac_phylo)         0.44951    0.45991   0.977   0.3321  
## siteMSE                0.10113    0.40970   0.247   0.8058  
## siteTNW                0.73665    0.40626   1.813   0.0746 .
## siteMNW:zoneFore reef -0.07387    0.41575  -0.178   0.8595  
## siteMSE:zoneFore reef -0.59531    0.37084  -1.605   0.1134  
## siteTNW:zoneFore reef -0.18926    0.35852  -0.528   0.5994  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8942 on 63 degrees of freedom
##   (73 observations deleted due to missingness)
## Multiple R-squared:  0.1683, Adjusted R-squared:  0.08906 
## F-statistic: 2.124 on 6 and 63 DF,  p-value: 0.06271
shapiro.test(log(mast$bac_rich))
## 
##  Shapiro-Wilk normality test
## 
## data:  log(mast$bac_rich)
## W = 0.98642, p-value = 0.5262
a1 <- lm(het.t~log(bac_rich)+site/zone,data=mast)
summary(a1) #ns
## 
## Call:
## lm(formula = het.t ~ log(bac_rich) + site/zone, data = mast)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.75827 -0.68668 -0.03266  0.56815  1.83602 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           -1.883792   1.476813  -1.276   0.2068  
## log(bac_rich)          0.386405   0.338355   1.142   0.2578  
## siteMSE                0.197491   0.422911   0.467   0.6421  
## siteTNW                0.788421   0.413572   1.906   0.0612 .
## siteMNW:zoneFore reef -0.009629   0.419931  -0.023   0.9818  
## siteMSE:zoneFore reef -0.662011   0.383936  -1.724   0.0896 .
## siteTNW:zoneFore reef -0.162770   0.360370  -0.452   0.6531  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8918 on 63 degrees of freedom
##   (73 observations deleted due to missingness)
## Multiple R-squared:  0.1728, Adjusted R-squared:  0.094 
## F-statistic: 2.193 on 6 and 63 DF,  p-value: 0.0552

2.4.3 Colony size x bacterial diversity

size.t <- bestNormalize(mast$size_cm2)
#Standardized Yeo-Johnson Transformation with 109 nonmissing obs.
mast$size.t <- size.t$x.t
shapiro.test(mast$size.t)
## 
##  Shapiro-Wilk normality test
## 
## data:  mast$size.t
## W = 0.98683, p-value = 0.3641

2.4.3.1 Evenness

shapiro.test(mast$bac_even)
## 
##  Shapiro-Wilk normality test
## 
## data:  mast$bac_even
## W = 0.98352, p-value = 0.3597
a1 <- lm(size.t~bac_even+site/zone,data=mast)
summary(a1) #ns
## 
## Call:
## lm(formula = size.t ~ bac_even + site/zone, data = mast)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.68250 -0.51649 -0.01966  0.63430  1.54280 
## 
## Coefficients: (1 not defined because of singularities)
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             0.8624     0.5042   1.710   0.0921 .  
## bac_even                0.2461     0.9765   0.252   0.8018    
## siteMSE                -0.7466     0.3133  -2.383   0.0202 *  
## siteTNW                -0.7863     0.3108  -2.530   0.0139 *  
## siteMNW:zoneFore reef  -1.3211     0.3160  -4.181 9.14e-05 ***
## siteMSE:zoneFore reef       NA         NA      NA       NA    
## siteTNW:zoneFore reef  -0.8049     0.3227  -2.494   0.0153 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8339 on 63 degrees of freedom
##   (74 observations deleted due to missingness)
## Multiple R-squared:  0.3276, Adjusted R-squared:  0.2742 
## F-statistic: 6.138 on 5 and 63 DF,  p-value: 0.0001079
#all info
gg.size.even <- ggplot(data=mast,aes(x=size.t,y=bac_even,color=site,linetype=zone,shape=zone))+
  geom_point()+
  stat_smooth(method="lm",se=FALSE)+
  theme_cowplot()+
  #xlab("Colony size (cm^2, Yeo-Johns. trans.)")+
  ylab("Evenness")+
  scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
  scale_shape_manual(name="Reef zone",values=c(16,15),labels=c("BR","FR"))+
  scale_linetype_manual(name="Reef zone",values=c("solid","dashed"),labels=c("BR","FR"))+
  theme(axis.title.x=element_blank())
gg.size.even
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).
## Warning: Removed 74 rows containing missing values (geom_point).

# #wrapped by site
# ggplot(data=mast,aes(x=log(size_cm2),y=log(bac_even),color=zone))+
#   geom_point()+
#   facet_wrap(~site)+
#   stat_smooth(method="lm")+
#   theme_cowplot()
# 
# #wrapped by zone
# ggplot(data=mast,aes(x=log(size_cm2),y=log(bac_even),color=site))+
#   geom_point()+
#   stat_smooth(method="lm")+
#   facet_wrap(~zone)+
#   theme_cowplot()

2.4.3.2 Shannon

shapiro.test(mast$bac_shannon)
## 
##  Shapiro-Wilk normality test
## 
## data:  mast$bac_shannon
## W = 0.9918, p-value = 0.8799
a1 <- lm(size.t~bac_shannon+site/zone,data=mast)
summary(a1) #ns
## 
## Call:
## lm(formula = size.t ~ bac_shannon + site/zone, data = mast)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.67243 -0.51293 -0.01862  0.63926  1.52001 
## 
## Coefficients: (1 not defined because of singularities)
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            0.89713    0.46180   1.943   0.0565 .  
## bac_shannon            0.04028    0.20518   0.196   0.8450    
## siteMSE               -0.74487    0.31725  -2.348   0.0220 *  
## siteTNW               -0.78193    0.31457  -2.486   0.0156 *  
## siteMNW:zoneFore reef -1.31809    0.31669  -4.162 9.75e-05 ***
## siteMSE:zoneFore reef       NA         NA      NA       NA    
## siteTNW:zoneFore reef -0.80450    0.32399  -2.483   0.0157 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.834 on 63 degrees of freedom
##   (74 observations deleted due to missingness)
## Multiple R-squared:  0.3273, Adjusted R-squared:  0.2739 
## F-statistic: 6.131 on 5 and 63 DF,  p-value: 0.0001092
gg.size.shan <- ggplot(data=mast,aes(x=size.t,y=bac_shannon,color=site,linetype=zone,shape=zone))+
  geom_point()+
  stat_smooth(method="lm",se=FALSE)+
  theme_cowplot()+
  xlab(" ")+
  ylab("Shannon index")+
  scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
  scale_shape_manual(name="Reef zone",values=c(16,15),labels=c("BR","FR"))+
  scale_linetype_manual(name="Reef zone",values=c("solid","dashed"),labels=c("BR","FR"))
gg.size.shan
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).
## Warning: Removed 74 rows containing missing values (geom_point).

2.4.3.3 Simpson

shapiro.test(log(mast$bac_simpson))
## 
##  Shapiro-Wilk normality test
## 
## data:  log(mast$bac_simpson)
## W = 0.97101, p-value = 0.0548
a1 <- lm(size.t~log(bac_simpson)+site/zone,data=mast)
summary(a1) #ns
## 
## Call:
## lm(formula = size.t ~ log(bac_simpson) + site/zone, data = mast)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.66530 -0.51652 -0.02691  0.60941  1.54948 
## 
## Coefficients: (1 not defined because of singularities)
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            0.87065    0.38976   2.234   0.0291 *  
## log(bac_simpson)       0.09246    0.28167   0.328   0.7438    
## siteMSE               -0.74323    0.31309  -2.374   0.0207 *  
## siteTNW               -0.78307    0.31109  -2.517   0.0144 *  
## siteMNW:zoneFore reef -1.32193    0.31587  -4.185    9e-05 ***
## siteMSE:zoneFore reef       NA         NA      NA       NA    
## siteTNW:zoneFore reef -0.78724    0.33035  -2.383   0.0202 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8336 on 63 degrees of freedom
##   (74 observations deleted due to missingness)
## Multiple R-squared:  0.3281, Adjusted R-squared:  0.2747 
## F-statistic: 6.151 on 5 and 63 DF,  p-value: 0.0001058
gg.size.simp <- ggplot(data=mast,aes(x=size.t,y=log(bac_simpson),color=site,linetype=zone,shape=zone))+
  geom_point()+
  stat_smooth(method="lm",se=FALSE)+
  theme_cowplot()+
  xlab("Colony size (cm^2, Yeo-Johns. trans.)")+
  ylab("Inv. Simpson index")+
  scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
  scale_shape_manual(name="Reef zone",values=c(16,15),labels=c("BR","FR"))+
  scale_linetype_manual(name="Reef zone",values=c("solid","dashed"),labels=c("BR","FR"))+
  ylim(0,2.25)
gg.size.simp
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).
## Warning: Removed 74 rows containing missing values (geom_point).

2.4.3.4 Richness

shapiro.test(log(mast$bac_rich))
## 
##  Shapiro-Wilk normality test
## 
## data:  log(mast$bac_rich)
## W = 0.98642, p-value = 0.5262
a1 <- lm(size.t~log(bac_rich)+site/zone,data=mast)
summary(a1) #ns
## 
## Call:
## lm(formula = size.t ~ log(bac_rich) + site/zone, data = mast)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.65729 -0.51673 -0.01987  0.64024  1.47864 
## 
## Coefficients: (1 not defined because of singularities)
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            0.85395    1.39658   0.611 0.543098    
## log(bac_rich)          0.02941    0.32902   0.089 0.929046    
## siteMSE               -0.75055    0.32152  -2.334 0.022779 *  
## siteTNW               -0.78398    0.32432  -2.417 0.018543 *  
## siteMNW:zoneFore reef -1.31821    0.31905  -4.132 0.000108 ***
## siteMSE:zoneFore reef       NA         NA      NA       NA    
## siteTNW:zoneFore reef -0.80599    0.33014  -2.441 0.017453 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8342 on 63 degrees of freedom
##   (74 observations deleted due to missingness)
## Multiple R-squared:  0.327,  Adjusted R-squared:  0.2736 
## F-statistic: 6.122 on 5 and 63 DF,  p-value: 0.0001107
gg.size.rich <- ggplot(data=mast,aes(x=size.t,y=log(bac_rich),color=site,linetype=zone,shape=zone))+
  geom_point()+
  stat_smooth(method="lm",se=FALSE)+
  theme_cowplot()+
  #xlab("Colony size (cm^2, Yeo-Johns. trans.)")+
  ylab("ASV richness")+
  scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
  scale_shape_manual(name="Reef zone",values=c(16,15),labels=c("BR","FR"))+
  scale_linetype_manual(name="Reef zone",values=c("solid","dashed"),labels=c("BR","FR"))+
  theme(axis.title.x=element_blank())
gg.size.rich
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).
## Warning: Removed 74 rows containing missing values (geom_point).

2.4.3.5 Faith’s D

shapiro.test(log(mast$bac_phylo))
## 
##  Shapiro-Wilk normality test
## 
## data:  log(mast$bac_phylo)
## W = 0.98859, p-value = 0.6734
a1 <- lm(size.t~log(bac_phylo)+site/zone,data=mast)
summary(a1) #ns
## 
## Call:
## lm(formula = size.t ~ log(bac_phylo) + site/zone, data = mast)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.65445 -0.48462  0.00767  0.63242  1.48410 
## 
## Coefficients: (1 not defined because of singularities)
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            0.02084    1.25056   0.017   0.9868    
## log(bac_phylo)         0.33734    0.43453   0.776   0.4405    
## siteMSE               -0.73271    0.31029  -2.361   0.0213 *  
## siteTNW               -0.73039    0.31876  -2.291   0.0253 *  
## siteMNW:zoneFore reef -1.31247    0.31488  -4.168 9.54e-05 ***
## siteMSE:zoneFore reef       NA         NA      NA       NA    
## siteTNW:zoneFore reef -0.76337    0.32608  -2.341   0.0224 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8303 on 63 degrees of freedom
##   (74 observations deleted due to missingness)
## Multiple R-squared:  0.3333, Adjusted R-squared:  0.2804 
## F-statistic: 6.299 on 5 and 63 DF,  p-value: 8.452e-05
gg.size.phyl <- ggplot(data=mast,aes(x=size.t,y=log(bac_phylo),color=site,linetype=zone,shape=zone))+
  geom_point()+
  stat_smooth(method="lm",se=FALSE)+
  theme_cowplot()+
  xlab("Colony size (cm^2, Yeo-Johns. trans.)")+
  ylab("Faith's D")+
  scale_color_manual(name="Site",values=c("darkslategray3","darkslategray4","#000004"))+
  scale_shape_manual(name="Reef zone",values=c(16,15),labels=c("BR","FR"))+
  scale_linetype_manual(name="Reef zone",values=c("solid","dashed"),labels=c("BR","FR"))
gg.size.phyl
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).
## Warning: Removed 74 rows containing missing values (geom_point).

2.4.4 Size x bacterial diversity: synthesizing results

gg.size.bac.all <- ggarrange(gg.size.rich,gg.size.even,gg.size.shan,gg.size.simp,gg.size.phyl,nrow=3,ncol=2,labels="AUTO",common.legend=TRUE,legend="right")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).
## Warning: Removed 74 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).

## Warning: Removed 74 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).

## Warning: Removed 74 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).

## Warning: Removed 74 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).

## Warning: Removed 74 rows containing missing values (geom_point).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 74 rows containing non-finite values (stat_smooth).

## Warning: Removed 74 rows containing missing values (geom_point).
gg.size.bac.all

#ggsave(gg.size.bac.all,file="size.bac.pdf",width=7,height=8)

2.5 Bac x sym

##the sammple sizes across syms are crazy uneven

#plot(bac_rich~sym_type,data=mast) #ns
# summary(aov(bac_rich~sym_type_top,data=mast)) #ns
# plot(bac_phylo~sym_type,data=mast) #ns
# summary(aov(bac_phylo~sym_type_top,data=mast)) #ns

#mast$bac<- complete.cases(mast$bac_even)

#sample numbers:
# plot(mast$sym_type_top)
# 
#plot(bac_even~sym_type,data=mast) 
# a.sym.bac.even <- aov(bac_even~sym_type_top,data=mast) #sig but A3 is the most different & it has almost no samples
# summary(a.sym.bac.even)
# TukeyHSD(a.sym.bac.even)
# 
 plot(bac_shannon~sym_type,data=mast) #

#.sym.bac.shan <- aov(bac_shannon~sym_type_top,data=mast) #sig but A3 is the most different & it has almost no samples
# summary(a.sym.bac.shan) #sig
# TukeyHSD(a.sym.bac.shan)
# 
plot(bac_simpson~sym_type,data=mast) #

# a.sym.bac.simp <- aov(bac_simpson~sym_type_top,data=mast) #sig but A3 is the most different & it has almost no samples
# summary(a.sym.bac.simp) #sig
# TukeyHSD(a.sym.bac.simp)

plot(bac_rich~sym_type,data=mast)

plot(bac_even~sym_type,data=mast)

plot(bac_phylo~sym_type,data=mast)

3 Not using right now

host.bac <- mast[,c(7:8,12:16)]

chart.Correlation(host.bac, histogram=TRUE, pch=19)

host.bac.mnw <- mast.mnw[,c(7:8,12:16)]
chart.Correlation(host.bac.mnw, histogram=TRUE, pch=19)

host.bac.mse <- mast.mse[,c(7:8,12:16)]
chart.Correlation(host.bac.mse, histogram=TRUE, pch=19)

host.bac.tnw <- mast.tnw[,c(7:8,12:16)]
chart.Correlation(host.bac.tnw, histogram=TRUE, pch=19)

summary(lm(log(host_het)~log(bac_even),data=mast.mnw)) #ns
summary(lm(log(host_het)~log(bac_even),data=mast.mse)) #ns
summary(lm(log(host_het)~log(bac_even),data=mast.tnw)) #ns

summary(lm(host_het~bac_even,data=mast.mnw)) #ns
summary(lm(host_het~bac_even,data=mast.mse)) #ns
summary(lm(host_het~bac_even,data=mast.tnw)) #ns

# library(nlme)
# 
# bac <- mast[complete.cases(mast$bac_even),]
# host.bac <- bac[complete.cases(bac$host_het),]
# 
# host.bac.mnw <- subset(host.bac,site=="MNW")
# 
# lme.host.bac <- lme(host_het~bac_even+site+zone,data=host.bac,random=~1)
# summary(lm(host_het~bac_even+site+zone,data=host.bac))
# plot(lme.host.bac)
# anova <- anova(lme.host.bac)
# anova
# 
# summary(lm(host_het~bac_even,data=host.bac.mnw,random=~1|zone))

4 RDA

library(vegan)
data(mite)
data(mite.env)
data(mite.pcnm)

# Two explanatory data frames -- Hellinger-transform Y
mod <- varpart(mite, mite.env, mite.pcnm, transfo="hel")
mod
## 
## Partition of variance in RDA 
## 
## Call: varpart(Y = mite, X = mite.env, mite.pcnm, transfo = "hel")
## Species transformation:  hellinger
## Explanatory tables:
## X1:  mite.env
## X2:  mite.pcnm 
## 
## No. of explanatory tables: 2 
## Total variation (SS): 27.205 
##             Variance: 0.39428 
## No. of observations: 70 
## 
## Partition table:
##                      Df R.squared Adj.R.squared Testable
## [a+b] = X1           11   0.52650       0.43670     TRUE
## [b+c] = X2           22   0.62300       0.44653     TRUE
## [a+b+c] = X1+X2      33   0.75893       0.53794     TRUE
## Individual fractions                                    
## [a] = X1|X2          11                 0.09141     TRUE
## [b]                   0                 0.34530    FALSE
## [c] = X2|X1          22                 0.10124     TRUE
## [d] = Residuals                         0.46206    FALSE
## ---
## Use function 'rda' to test significance of fractions of interest
## Use fill colours
showvarparts(2, bg = c("hotpink","skyblue"))

plot(mod, bg = c("hotpink","skyblue"))

## Test fraction [a] using partial RDA, '~ .' in formula tells to use
## all variables of data mite.env.
aFrac <- rda(decostand(mite, "hel"), mite.env, mite.pcnm)
anova(aFrac)
## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 999
## 
## Model: rda(X = decostand(mite, "hel"), Y = mite.env, Z = mite.pcnm)
##          Df Variance      F Pr(>F)    
## Model    11 0.053592 1.8453  0.001 ***
## Residual 36 0.095050                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RsquareAdj gives the same result as component [a] of varpart
RsquareAdj(aFrac)
## $r.squared
## [1] 0.1359251
## 
## $adj.r.squared
## [1] 0.09140797
## Partition Bray-Curtis dissimilarities
varpart(vegdist(mite), mite.env, mite.pcnm)
## 
## Partition of squared Bray distance in dbRDA 
## 
## Call: varpart(Y = vegdist(mite), X = mite.env, mite.pcnm)
## 
## Explanatory tables:
## X1:  mite.env
## X2:  mite.pcnm 
## 
## No. of explanatory tables: 2 
## Total variation (SS): 14.696 
## No. of observations: 70 
## 
## Partition table:
##                      Df R.squared Adj.R.squared Testable
## [a+b] = X1           11   0.50512       0.41127     TRUE
## [b+c] = X2           22   0.60144       0.41489     TRUE
## [a+b+c] = X1+X2      33   0.74631       0.51375     TRUE
## Individual fractions                                    
## [a] = X1|X2          11                 0.09887     TRUE
## [b]                   0                 0.31240    FALSE
## [c] = X2|X1          22                 0.10249     TRUE
## [d] = Residuals                         0.48625    FALSE
## ---
## Use function 'dbrda' to test significance of fractions of interest
## Three explanatory tables with formula interface
mod <- varpart(mite, ~ SubsDens + WatrCont, ~ Substrate + Shrub + Topo,
   mite.pcnm, data=mite.env, transfo="hel")
mod
## 
## Partition of variance in RDA 
## 
## Call: varpart(Y = mite, X = ~SubsDens + WatrCont, ~Substrate + Shrub +
## Topo, mite.pcnm, data = mite.env, transfo = "hel")
## Species transformation:  hellinger
## Explanatory tables:
## X1:  ~SubsDens + WatrCont
## X2:  ~Substrate + Shrub + Topo
## X3:  mite.pcnm 
## 
## No. of explanatory tables: 3 
## Total variation (SS): 27.205 
##             Variance: 0.39428 
## No. of observations: 70 
## 
## Partition table:
##                       Df R.square Adj.R.square Testable
## [a+d+f+g] = X1         2  0.32677      0.30667     TRUE
## [b+d+e+g] = X2         9  0.40395      0.31454     TRUE
## [c+e+f+g] = X3        22  0.62300      0.44653     TRUE
## [a+b+d+e+f+g] = X1+X2 11  0.52650      0.43670     TRUE
## [a+c+d+e+f+g] = X1+X3 24  0.67372      0.49970     TRUE
## [b+c+d+e+f+g] = X2+X3 31  0.72400      0.49884     TRUE
## [a+b+c+d+e+f+g] = All 33  0.75893      0.53794     TRUE
## Individual fractions                                   
## [a] = X1 | X2+X3       2               0.03910     TRUE
## [b] = X2 | X1+X3       9               0.03824     TRUE
## [c] = X3 | X1+X2      22               0.10124     TRUE
## [d]                    0               0.01407    FALSE
## [e]                    0               0.09179    FALSE
## [f]                    0               0.08306    FALSE
## [g]                    0               0.17045    FALSE
## [h] = Residuals                        0.46206    FALSE
## Controlling 1 table X                                  
## [a+d] = X1 | X3        2               0.05317     TRUE
## [a+f] = X1 | X2        2               0.12216     TRUE
## [b+d] = X2 | X3        9               0.05231     TRUE
## [b+e] = X2 | X1        9               0.13003     TRUE
## [c+e] = X3 | X1       22               0.19303     TRUE
## [c+f] = X3 | X2       22               0.18429     TRUE
## ---
## Use function 'rda' to test significance of fractions of interest
showvarparts(3, bg=2:4)

plot(mod, bg=2:4)

## Use RDA to test fraction [a]
## Matrix can be an argument in formula
rda.result <- rda(decostand(mite, "hell") ~ SubsDens + WatrCont +
   Condition(Substrate + Shrub + Topo) +
   Condition(as.matrix(mite.pcnm)), data = mite.env)
anova(rda.result)
## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 999
## 
## Model: rda(formula = decostand(mite, "hell") ~ SubsDens + WatrCont + Condition(Substrate + Shrub + Topo) + Condition(as.matrix(mite.pcnm)), data = mite.env)
##          Df Variance      F Pr(>F)   
## Model     2 0.013771 2.6079  0.004 **
## Residual 36 0.095050                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Four explanatory tables
mod <- varpart(mite, ~ SubsDens + WatrCont, ~Substrate + Shrub + Topo,
  mite.pcnm[,1:11], mite.pcnm[,12:22], data=mite.env, transfo="hel")
mod
## 
## Partition of variance in RDA 
## 
## Call: varpart(Y = mite, X = ~SubsDens + WatrCont, ~Substrate + Shrub +
## Topo, mite.pcnm[, 1:11], mite.pcnm[, 12:22], data = mite.env, transfo =
## "hel")
## Species transformation:  hellinger
## Explanatory tables:
## X1:  ~SubsDens + WatrCont
## X2:  ~Substrate + Shrub + Topo
## X3:  mite.pcnm[, 1:11]
## X4:  mite.pcnm[, 12:22] 
## 
## No. of explanatory tables: 4 
## Total variation (SS): 27.205 
##             Variance: 0.39428 
## No. of observations: 70 
## 
## Partition table:
##                             Df R.square Adj.R.square Testable
## [aeghklno] = X1              2  0.32677      0.30667     TRUE
## [befiklmo] = X2              9  0.40395      0.31454     TRUE
## [cfgjlmno] = X3             11  0.53231      0.44361     TRUE
## [dhijkmno] = X4             11  0.09069     -0.08176     TRUE
## [abefghiklmno] = X1+X2      11  0.52650      0.43670     TRUE
## [acefghjklmno] = X1+X3      13  0.59150      0.49667     TRUE
## [adeghijklmno] = X1+X4      13  0.40374      0.26533     TRUE
## [bcefgijklmno] = X2+X3      20  0.63650      0.48813     TRUE
## [bdefhijklmno] = X2+X4      20  0.53338      0.34292     TRUE
## [cdfghijklmno] = X3+X4      22  0.62300      0.44653     TRUE
## [abcefghijklmno] = X1+X2+X3 22  0.67947      0.52944     TRUE
## [abdefghijklmno] = X1+X2+X4 22  0.61553      0.43557     TRUE
## [acdefghijklmno] = X1+X3+X4 24  0.67372      0.49970     TRUE
## [bcdefghijklmno] = X2+X3+X4 31  0.72400      0.49884     TRUE
## [abcdefghijklmno] = All     33  0.75893      0.53794     TRUE
## Individual fractions                                         
## [a] = X1 | X2+X3+X4          2               0.03910     TRUE
## [b] = X2 | X1+X3+X4          9               0.03824     TRUE
## [c] = X3 | X1+X2+X4         11               0.10237     TRUE
## [d] = X4 | X1+X2+X3         11               0.00850     TRUE
## [e]                          0               0.01407    FALSE
## [f]                          0               0.13200    FALSE
## [g]                          0               0.05355    FALSE
## [h]                          0               0.00220    FALSE
## [i]                          0              -0.00547    FALSE
## [j]                          0              -0.00963    FALSE
## [k]                          0              -0.00231    FALSE
## [l]                          0               0.24037    FALSE
## [m]                          0              -0.03474    FALSE
## [n]                          0               0.02730    FALSE
## [o]                          0              -0.06761    FALSE
## [p] = Residuals              0               0.46206    FALSE
## Controlling 2 tables X                                       
## [ae] = X1 | X3+X4            2               0.05317     TRUE
## [ag] = X1 | X2+X4            2               0.09265     TRUE
## [ah] = X1 | X2+X3            2               0.04131     TRUE
## [be] = X2 | X3+X4            9               0.05231     TRUE
## [bf] = X2 | X1+X4            9               0.17024     TRUE
## [bi] = X2 | X1+X3            9               0.03277     TRUE
## [cf] = X3 | X1+X4           11               0.23437     TRUE
## [cg] = X3 | X2+X4           11               0.15592     TRUE
## [cj] = X3 | X1+X2           11               0.09274     TRUE
## [dh] = X4 | X2+X3           11               0.01071     TRUE
## [di] = X4 | X1+X3           11               0.00303     TRUE
## [dj] = X4 | X1+X2           11              -0.00113     TRUE
## Controlling 1 table X                                        
## [aghn] = X1 | X2             2               0.12216     TRUE
## [aehk] = X1 | X3             2               0.05306     TRUE
## [aegl] = X1 | X4             2               0.34709     TRUE
## [bfim] = X2 | X1             9               0.13003     TRUE
## [beik] = X2 | X3             9               0.04452     TRUE
## [befl] = X2 | X4             9               0.42468     TRUE
## [cfjm] = X3 | X1            11               0.19000     TRUE
## [cgjn] = X3 | X2            11               0.17359     TRUE
## [cfgl] = X3 | X4            11               0.52830     TRUE
## [dijm] = X4 | X1            11              -0.04134     TRUE
## [dhjn] = X4 | X2            11               0.02837     TRUE
## [dhik] = X4 | X3            11               0.00292     TRUE
## ---
## Use function 'rda' to test significance of fractions of interest
plot(mod, bg=2:5)

## Show values for all partitions by putting 'cutoff' low enough:
plot(mod, cutoff = -Inf, cex = 0.7, bg=2:5)

sym.data <- read.csv("~/nicfall drive/Moorea_revisions/mrits_revised/symportal_profile_counts.csv",header=T)

sym.meta <- read.csv("~/nicfall drive/Moorea_revisions/mrits_revised/mrits_sampledata copy.csv",header=T)

sym.sub <- mast[complete.cases(mast$sym_clade),]
sym.meta$sample_num <- sym.meta$Sample

sym.merge <- merge(sym.meta,sym.sub,by="sample_num")

sym.merge.het <- sym.merge[complete.cases(sym.merge$host_het),]

sym.site <- sym.merge.het[,c(7:8)]
sym.host <- sym.merge.het[,c(18)]

sym.data.less <- sym.data[sym.data$Sample %in% c(sym.merge.het$sample_num),]
row.names(sym.data.less) <- sym.data.less$Sample
sym.data2 <- sym.data.less[,2:10]

mod <- varpart(sym.data2, sym.site, sym.host, transfo="hel")
mod

## Use fill colours
showvarparts(2, bg = c("hotpink","skyblue"))
plot(mod, bg = c("hotpink","skyblue"))
## Test fraction [a] using partial RDA, '~ .' in formula tells to use
## all variables of data mite.env.
aFrac <- rda(decostand(sym.data2, "hel"), sym.site, sym.host)
anova(aFrac)
## RsquareAdj gives the same result as component [a] of varpart
RsquareAdj(aFrac)

sym.hell <- decostand(sym.data2, 'hell')  # we are planning to do tb-RDA, this is Hellinger pre-transformation

rda(formula = sym.hell ~ site.x*zone.x, data = sym.site)